library(rmapshaper)
library(censusapi)
library(shapefiles)
library(base)
library(tidyverse)
library(tigris)
library(sf)
library(mapview)
library(raster)
library(rlang)
library(rgeos)
library(data.table)
library(measurements)
library(smoothr)
library(lwgeom)
library(units)
library(geosphere)
library(kableExtra)
library(leaflet)
library(htmltools)
library(plotly)
library(osrm)
library(tsviz)
library(sp)
mapviewOptions(
basemaps = "OpenStreetMap"
)
options(
tigris_class = "sf",
tigris_use_cache = TRUE,
osrm.server = "http://127.0.0.1:5000/",
osrm.profile = "walk"
)
Sys.setenv(CENSUS_KEY="b88495f8315f45b2d100dee9ba8f4c489a7371c2")
projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"
`%notin%` <- Negate(`%in%`)
Does the location of where one lives impact the ability to enjoy outdoor green space? Does the size of the yard at your house play a factor in whether you regularly visit parks? Are communities under-visiting parks that are physically closer to them? The goal of this analysis is to provide insight on park accessibility within communities in the City of San Jose. We will consider factors such as backyard size, park visit frequency, and distance from a park to determine a community’s park accessibility.
The first few sections describe our methodological approaches for preparing the factors for analysis, followed by sections detailing the comparisons between factors and the resulting conclusions.
The following describes how the Safegraph data is processed:
Safegraph collects device visit information to “places of interest” (POI) like parks or grocery stores and includes some information about a device’s origin census block group. The visit counts in the Safegraph dataset fall into either one of these three cases:
Note: We obtained all of the data for the park visits portion of the analysis from Safegraph Patterns Data.
Our goal for this section is to obtain the residential yard size per capita for each census block group in San Jose. We start with parcel data from the Santa Clara County Assessor’s Office, and filter down to only include parcels with residential land use codes. The following list details the land use codes we used for this analysis:
After filtering to parcels with these residential use codes, we spatially match the building footprint shape associated with that parcel. To get from parcel area to yard area, we subtract the building footprint shape out of the parcel shape.
#load("P:/SFBI/Data Library/OSM/osm_bldg.Rdata")
#
#osm_bldg <-
# osm_bldg %>% st_transform(projection)
sj_boundary <-
places("CA", cb=FALSE) %>%
filter(NAME == "San Jose") %>%
st_transform(projection)
#
sj_blockgroup <-
readRDS("sj_blockgroups.rds") %>%
st_transform(projection)
#mapview(sj_blockgroup)
#load("P:/SFBI/Data Library/California Blocks/ca_blocks_lite.Rdata")
#ca_blocks_lite <- ca_blocks_lite %>% st_transform(projection)
#sj_blocks <- ca_blocks_lite[sj_boundary,]
#save(sj_blocks, file = "sj_blocks.Rdata")
#load("sj_blocks.Rdata")
#mapview(sj_blocks[100,])
# sj_bldg <-
# osm_bldg[sj_boundary,] %>%
# transmute(
# index = row_number()
# )
#save(sj_bldg, file = "sj_bldg.Rdata")
load("sj_bldg.Rdata")
# scc_parcels <- st_read("P:/SFBI/Data Library/Parcels/santa clara/scc.shp")
#
# scc_parcels <-
# scc_parcels %>%
# st_transform(projection)
#
# This includes parcels parcels at the periphery of San Jose. This is important for the "block method" used to find street edges. otherwise this sf object isn't used.
# sj_parcels_plus <-
# scc_parcels[sj_blocks,] %>%
# dplyr::select(APN, USE_CODE, SITUS_CITY)
# #
# sj_parcels <-
# sj_parcels_plus %>%
# filter(SITUS_CITY == "SAN JOSE")
# #
# save(sj_parcels_plus, sj_parcels, file = "sj_parcels.Rdata")
load("sj_parcels.Rdata")
# apn_bg_lookup <- data.frame(APN = NA, origin_census_block_group = NA)
#
# for(i in 1:nrow(sj_blockgroup)){
# if(i %% 10 == 0){print(i)}
# temp <- sj_blockgroup[i,]
# temp2 <- sj_parcels[temp,] %>%
# dplyr::select(APN) %>%
# st_set_geometry(NULL) %>%
# mutate(
# origin_census_block_group = temp$origin_census_block_group
# )
# apn_bg_lookup <-
# apn_bg_lookup %>%
# rbind(temp2)
# }
#
# apn_bg_lookup <-
# apn_bg_lookup %>%
# na.omit()
#
# saveRDS(apn_bg_lookup,"sj_apn_bg_lookup.rds")
apn_bg_lookup <- readRDS("sj_apn_bg_lookup.rds")
sj_parcels_residential <-
sj_parcels %>%
mutate(
USE_CODE = as.numeric(USE_CODE)
) %>%
filter(USE_CODE %in% c(1,2,3,4,6,7))
#mapview(head(sj_parcels_residential,100))
matched_bldg_footprint <-
sj_bldg %>%
st_centroid() %>%
st_intersection(sj_parcels_residential) %>%
st_set_geometry(NULL) %>%
left_join(sj_bldg, by = "index") %>%
st_as_sf()
#
#
residential_parcels <-
sj_parcels_residential %>%
mutate(
parcel_area = st_area(.) %>% as.numeric()
) %>%
as.data.frame() %>%
st_as_sf() %>%
rename(parcel_geometry = geometry)
#
#
residential_bldg <-
residential_parcels %>%
as.data.frame() %>%
right_join(
matched_bldg_footprint %>% dplyr::select(APN),
by = "APN"
) %>%
filter(!is.na(parcel_area)) %>%
rename(building_geometry = geometry) %>%
st_as_sf() %>%
st_set_geometry("building_geometry")
#
#
residential_parcels <-
residential_bldg %>%
st_set_geometry("parcel_geometry")
#
residential_parcels <- residential_parcels[!duplicated(residential_parcels), ]
##############################################################
# available_yard <-
# st_difference(
# residential_parcels[1:10000,],
# st_union(
# residential_parcels$building_geometry[1:10000,])
# )
#
# available_yard2 <-
# st_difference(
# residential_parcels[10001:20000,],
# st_union(
# residential_parcels$building_geometry[10001:20000,])
# )
#
# available_yard3 <-
# st_difference(
# residential_parcels[20001:30000,],
# st_union(
# residential_parcels$building_geometry[20001:30000,])
# )
# #
# available_yard4 <-
# st_difference(
# residential_parcels[30001:40000,],
# st_union(
# residential_parcels$building_geometry[30001:40000,])
# )
#
# available_yard5 <-
# st_difference(
# residential_parcels[40001:50000,],
# st_union(
# residential_parcels$building_geometry[40001:50000,])
# )
#
# available_yard6 <-
# st_difference(
# residential_parcels[50001:60000,],
# st_union(
# residential_parcels$building_geometry[50001:60000,])
# )
#
# available_yard7 <-
# st_difference(
# residential_parcels[60001:70000,],
# st_union(
# residential_parcels$building_geometry[60001:70000,])
# )
#
# available_yard8 <-
# st_difference(
# residential_parcels[70001:80000,],
# st_union(
# residential_parcels$building_geometry[70001:80000,])
# )
#
# available_yard9 <-
# st_difference(
# residential_parcels[80001:90000,],
# st_union(
# residential_parcels$building_geometry[80001:90000,])
# )
#
# available_yard10 <-
# st_difference(
# residential_parcels[90001:100000,],
# st_union(
# residential_parcels$building_geometry[90001:100000,])
# )
#
# available_yard10 <-
# st_difference(
# residential_parcels[100001:nrow(residential_parcels),],
# st_union(
# residential_parcels$building_geometry[100001:nrow(residential_parcels),])
# )
# all_available_yard <-
# available_yard %>%
# rbind(available_yard2) %>%
# rbind(available_yard3) %>%
# rbind(available_yard4) %>%
# rbind(available_yard5) %>%
# rbind(available_yard6) %>%
# rbind(available_yard7) %>%
# rbind(available_yard8) %>%
# rbind(available_yard9) %>%
# rbind(available_yard10)
#save(all_available_yard, file = "sj_all_yard_available.Rdata")
load("sj_all_yard_available.Rdata")
all_available_yard_other <-
all_available_yard %>%
filter(USE_CODE == 2 | USE_CODE == 3)
all_available_yard_apartment <-
all_available_yard %>%
filter(USE_CODE == 4)
all_available_yard_other2 <-
all_available_yard %>%
filter(USE_CODE %in% c(6,7))
available_area_fun <- function(df, residential_parcels){
df %>%
as.data.frame() %>%
rename(geometry = parcel_geometry) %>%
left_join(residential_parcels %>% dplyr::select(APN), by = "APN") %>%
st_as_sf() %>%
st_set_geometry("building_geometry") %>%
as_Spatial() %>%
gBuffer(byid = T, width = 0) %>%
sp::disaggregate() %>%
st_as_sf() %>%
rename(available_area_geometry = geometry) %>%
st_set_geometry("available_area_geometry") %>%
mutate(
available_area = round(st_area(.) %>% as.numeric())
) %>%
st_set_crs(projection)
}
# save(available_area, file = "sj_available_backyard_area.Rdata")
#!!!!This .Rdata file constians all of the yard areas for parcels zoned as single-family residential!!!!
load("sj_available_backyard_area.Rdata")
# PUC 2,3
available_area1_other <- available_area_fun(all_available_yard_other, residential_parcels)
available_area1_other <-
available_area1_other %>%
dplyr::select(APN, available_area, available_area_geometry)
#PUC 6,7
available_area2_other <- available_area_fun(all_available_yard_other2, residential_parcels)
available_area2_other <-
available_area2_other %>%
dplyr::select(APN, available_area, available_area_geometry)
available_area_apartment <-
all_available_yard_apartment %>%
mutate(
bldg_area = st_area(building_geometry)
) %>%
group_by(APN, parcel_area) %>%
summarize(bldg_area_sum = sum(as.numeric(bldg_area))) %>%
mutate(available_area = parcel_area - bldg_area_sum)
available_area_apartment <-
available_area_apartment %>%
rename(available_area_geometry = building_geometry) %>%
dplyr::select(APN,available_area,available_area_geometry) %>%
mutate(
available_area = ifelse(available_area < 0,0,available_area)
)
#combining all PUC dataframes
available_area_combo <-
available_area %>%
rbind(available_area1_other) %>%
rbind(available_area2_other) %>%
rbind(available_area_apartment)
nix_list <- c("69403028","41425018","67013004","67013003","67029024","74207011")
nix_df <-
available_area %>%
filter(APN %in% nix_list) %>%
left_join(
all_available_yard %>%
st_set_geometry(NULL) %>%
dplyr::select(APN, USE_CODE) %>%
filter(APN %in% nix_list),
by = "APN"
)
available_area_bg <-
available_area_combo %>%
left_join(
apn_bg_lookup %>%
filter(APN %in% available_area$APN),
by = "APN"
) %>%
filter(APN %notin% nix_list)
#
bg_area_avg <-
available_area_bg %>%
st_set_geometry(NULL) %>%
group_by(origin_census_block_group) %>%
summarise(yard_area_avg = sum(available_area))
#
extra_bg <-
sj_blockgroup %>%
filter(origin_census_block_group %notin% bg_area_avg$origin_census_block_group) %>%
st_set_geometry(NULL) %>%
dplyr::select(origin_census_block_group) %>%
mutate(yard_area_avg = 0)
#
bg_area_avg_all <-
bg_area_avg %>%
rbind(extra_bg) %>%
left_join(
sj_blockgroup %>%
dplyr::select(-DISTRICTS),
by = "origin_census_block_group"
) %>%
na.omit() %>%
st_as_sf() %>%
st_transform(4326)
#save(bg_area_avg_all, file = "sj_bg_avg_yard_area.Rdata")
load("sj_bg_avg_yard_area.Rdata")
sj_population <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = c("group(B01003)")
) %>%
mutate(
origin_census_block_group =
paste0(state,county,tract,block_group)
) %>%
rename(
total_pop = "B01003_001E"
) %>%
dplyr::select(total_pop, origin_census_block_group)
sj_yard_pop_weight <-
bg_area_avg_all %>%
left_join(
sj_population %>%
filter(origin_census_block_group %in% bg_area_avg_all$origin_census_block_group),
by = "origin_census_block_group"
) %>%
mutate(
yard_per_capita = yard_area_avg / total_pop
)
The following map shows the void building footprint within the parcel shape for one block group. The resulting shape represents the yard area for each parcel.
example_bg_yard <-
available_area_bg %>%
filter(origin_census_block_group == "060855006002")
mapview(example_bg_yard)
In order to align our geographic granularity with the other factors of our analysis, we take the sum of all of the yard sizes within a block group. This value is then divided by the block group population to obtain the yard area per capita. The following map shows the yard area per capita sizes for all of the block groups in San Jose. Notice that the largest values occur near the left border of the city, near the hills.
bg_area_avg_minus <-
sj_yard_pop_weight
blue_pal2 <- colorNumeric(
palette = colorRamp(c("#FFFFFF", "#00288c"), interpolate="spline"),
domain =
bg_area_avg_minus %>%
pull(yard_per_capita) %>%
unique()
)
yard_bg_map_minus <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>% st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = bg_area_avg_minus,
fillColor = ~blue_pal2(yard_per_capita),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
#group = "Average Daily Park Visits",
label = ~paste0("Block Group Average Yard Size per Capita: ",round(yard_per_capita), " square feet"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)) %>%
addLegend(
position = "topright",
pal = blue_pal2,
values = bg_area_avg_minus$yard_per_capita,
title = "Avg. Yard Area per Capita (sqft.)",
opacity = 1
)
yard_bg_map_minus
Note: The data from the Santa Clara County assessor’s office likely includes multiple zoning misclassifications. For example, the following parcel has a Property Use Code of 1 (single-family residential), but it is actually a water treatment plant. This “yard” (that has an area of ~one million square feet) was inflating the yard size for the block group in which it is located.
mapview(nix_df[7,])
We did a visual inspection of the single-family residential parcels in the block groups with large average yard sizes (over 30,000 sqft.), and removed the parcels that appeared to be misclassified. Keep in mind that there still may be other misclassified parcels incorporated into the block group average yard size.
Some overall San Jose residential yard size statistics:
Mean San Jose Yard Area per Capita: 386 sq. ft.
Standard Deviation: 660 sq. ft.
Minimum San Jose Yard Size per Capita: 0 sq. ft.
Maximum San Jose Yard Size per Capita: 6300 sq. ft.
For now, I will put a hold on this analysis and move to the average number of people who visit San Jose parks that reside in these block groups. At the end, we will come back to these yard areas to compare against other accessibility factors.
When looking at park visitors who live in specific census block groups (CBGs), it is not sufficient to consider the CBG visit numbers at face value. The population of each block group plays an important factor in weighing a community’s visits to parks. For example, if a CBG has 3 residents, and they have an average of 3 daily park visits, then we can infer that this community has very active park visitors (even though the 3 park visits may seem low).
The following map displays all of the census block groups we considered in this analysis (see the Safegraph Data Collection Explanation section above for why some CBGs are missing). The “Park Visits” option illustrates the average daily number of visits that each CBG had to a SJ park since January 1, 2020. The “Average Monthly Park Visits per Person” option provides more insight into the monthly frequency that community members visits parks.
####periodically update from sanjose_park_process file
park_origins <- readRDS("sj_park_origins.rds") %>%
filter(origin_census_block_group %in% sj_blockgroup$origin_census_block_group) %>%
mutate(month = month(date %>% as.Date()))
# park_origins_monthly <-
# park_origins %>%
# group_by(origin_census_block_group,month) %>%
# summarise(month_total = sum(mean_origin_visits)) %>%
# filter(month != 8)
park_geometry <-
readRDS('park_output_geometry.rds') %>%
st_as_sf() %>%
st_transform(projection)
park_origin_mean <-
park_origins %>%
group_by(origin_census_block_group) %>%
summarise(mean_visits = mean(mean_origin_visits))
visit_pop_combo <-
sj_population %>%
filter(origin_census_block_group %in% park_origin_mean$origin_census_block_group) %>%
left_join(
park_origin_mean,
by = "origin_census_block_group"
) %>%
na.omit() %>%
mutate(
visits_per_pop = round((mean_visits/total_pop) * 30,2)
) %>%
left_join(
sj_blockgroup %>%
dplyr::select(-DISTRICTS),
by = "origin_census_block_group"
) %>%
st_as_sf() %>%
st_transform(projection)
orange_pal <- colorNumeric(
palette = colorRamp(c("#fcd786", "#a67100"), interpolate="spline"),
domain =
visit_pop_combo %>%
pull(mean_visits) %>%
unique()
)
teal_pal <- colorNumeric(
palette = colorRamp(c("#d6fffe", "#015a68"), interpolate="spline"),
domain =
visit_pop_combo %>%
pull(visits_per_pop) %>%
unique()
)
all_visitors_map <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>% st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = visit_pop_combo %>%
st_transform(4326),
fillColor = ~orange_pal(mean_visits),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "Average Daily San Jose Park Visits",
label = ~paste0(round(mean_visits)," average daily park visits"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)) %>%
addPolygons(
data = visit_pop_combo %>%
st_transform(4326),
fillColor = ~teal_pal(visits_per_pop),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "Average Monthly Park Visits per Person",
label = ~paste0(visits_per_pop," average monthly park visits per person"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addLayersControl(
baseGroups = c("Average Daily San Jose Park Visits", "Average Monthly Park Visits per Person"),
options = layersControlOptions(collapsed = FALSE)
)
all_visitors_map
Some interesting results to highlight:
The block group that has both the highest average daily park visits and highest average monthly park visits per person is shown in the following view. Interestingly, this CBG contains the Santa Clara County jail and police office. Approximately 60% of the “residents” of this block group visit a San Jose park everyday. But who are these visitors? Inmates are unlikely to have a cellular device. After discussing this point with Stanford’s RegLab, we developed the hypothesis that there may be certain devices (tablets or phones) that police and correction officers use during the workday, but leave at the office during the night (which becomes the device’s home).
all_visitors_map %>%
setView(-121.906408, 37.350420, zoom = 15)
An additional block group that stands out is one located near Emma Prusch Farm Park. This CBG contains mainly an on-ramp, a portion of Bayshore Freeway, and a storage center. In the “Average Daily Park Visits” view, this CBG does not seem to have any significance. But when you switch to the “Average Monthly Park Visits per Person” view, the CBG shows to have the second highest visits/population ratio in all of San Jose (although the daily park visit average is only 7, each person visits a park on average ~15 times per month). This may be an instance of a homeless encampment underneath or near the freeway on-ramp, whose residents frequent the nearby Emma Prusch Farm Park.
homeless <-
all_visitors_map %>%
showGroup("Average Park Visits per 1000") %>%
setView(-121.847157, 37.335857, zoom = 15)
homeless